State Results
priors_versions <- c("v1", "v2", "v3", "v4")
versions <- tibble(
version = c("v1", "v2", "v3", "v4"),
vlabel = factor(
c( "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values",
"$P(S_1|untested)$ Centered at Emp. Value"),
levels = c(
"$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value",
"$P(S_1|untested)$ Centered at Emp. Value",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values"
) )
)
state_corrected_path <- here("analysis/results/adj_biweekly_state/")
################################
# ESTIMATED
################################
dates <- readRDS(here("data/date_to_biweek.RDS")) %>%
group_by(biweek)
covidestim <- readRDS(here('data/covidestim/covidestim_biweekly_all_states.RDS')) %>%
select(-date)
corrected <- map_df(priors_versions, ~readRDS(
paste0(state_corrected_path, "adj_",
.x,
".RDS")) %>%
mutate(version = .x)) %>%
left_join(dates, relationship = "many-to-many") %>%
rename(state=fips) %>%
left_join(versions) %>%
mutate(state=toupper(state)) %>%
left_join(covidestim, relationship= "many-to-many")## Joining with `by = join_by(biweek)`
## Joining with `by = join_by(version)`
## Joining with `by = join_by(biweek, state)`
Summarizing Concordance with Covidestim
# corrected %>%
# mutate(in_interval = case_when(
# exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
# exp_cases_lb > infections ~ "Below Interval",
# exp_cases_ub < infections ~ "Above Interval"
# )) %>%
# filter(!is.na(in_interval)) %>%
# group_by(in_interval, state, vlabel) %>%
# summarize(n_per_county=n()) %>%
# group_by(vlabel, in_interval) %>%
# summarize(n_per_version = sum(n_per_county)) %>%
# group_by(vlabel) %>%
# mutate(total = sum(n_per_version)) %>%
# ungroup() %>%
# mutate(prop=n_per_version/total)
by_version <- corrected %>%
mutate(in_interval = case_when(
exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
exp_cases_lb > infections ~ "Below Interval",
exp_cases_ub < infections ~ "Above Interval"
)) %>%
filter(!is.na(in_interval)) %>%
group_by(in_interval, vlabel) %>%
summarize(n=n()) %>%
group_by(vlabel) %>%
mutate(total = sum(n)) %>%
mutate(prop=n/total) ## `summarise()` has grouped output by 'in_interval'. You can override using the
## `.groups` argument.
labels <- by_version %>%
arrange(prop) %>%
pull(vlabel) %>%
as.character() %>%
unique()
by_version %>%
ggplot(aes(x= fct_reorder(vlabel,prop,max),
fill = in_interval, y =prop)) +
geom_bar(stat="identity",position="dodge") +
theme_c() +
coord_flip()+ scale_fill_manual(values=c("In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79",
"Below Interval"="#7997D2")) +
labs(x="",
y= "Proportion",
fill = "Covidestim Median:",
title = "Proportion Where Covidestim Median\nWas Within, Above, or Below the Median",
subtitle = "State Level") +
theme_c() +
theme(legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt'),
axis.text.y = element_text(size = 17, hjust=1)) +
scale_x_discrete(labels = (TeX(labels)))ggsave(here('presentation/figure/covidestim_concordance_state.jpeg'), height=8, width=14)Testing Rate over Time
subset <- corrected %>%
filter(vlabel == "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$")
subset %>%
ggplot(aes(x=date, y =total/population))+
geom_line() +
facet_wrap(~state) +
theme_c() +
labs(title ="Biweekly Testing Rate",
y = "Total Number of Tests / Population Size")subset %>%
ggplot(aes(x=date, y =total/population))+
geom_line() +
facet_wrap(~state) +
theme_c() +
labs(title ="Biweekly Testing Rate",
y = "Total Number of Tests / Population Size")###########################################################################
# plot relationship between ratio of estimated cases to observed
# against testing rate
###########################################################################
corrected %>%
ggplot(aes(x=total/population, y = exp_cases_median/positive)) +
geom_point(alpha=.008, size=.8) +
facet_wrap(~vlabel,
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
theme_c(axis.text.x = element_text(size =9),
axis.title = element_text(size=16) ) +
theme(strip.text.x = element_text(size=11, color="white")) +
scale_y_continuous(n.breaks=10) +
geom_hline(yintercept=1, linetype=2,color="darkred", alpha=.8, linewidth=.5) +
labs(y = "Ratio of Estimated Infections to Observed",
x = "Testing Rate",
subtitle = "All Time Intervals and All States",
title = "Relationship Between Testing Rate and\nRatio of Estimated Infections to Observed")+
scale_x_continuous(limits=c(0,.25), n.breaks=4)## Warning: Removed 228 rows containing missing values (`geom_point()`).
ggsave(here('thesis/figure/testing_rate_ratio.jpeg'), width=11,height=7)## Warning: Removed 228 rows containing missing values (`geom_point()`).
corrected %>%
ggplot(aes(x=total, y = exp_cases_median)) +
geom_point(alpha=.008, size=.8) +
facet_wrap(~vlabel,
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
theme_c(axis.text.x = element_text(size =9),
axis.title = element_text(size=16) ) +
theme(strip.text.x = element_text(size=11, color="white")) +
scale_y_continuous(n.breaks=10) +
labs(x = "Total Tests",
y = "Estimated Infections",
subtitle = "All Time Intervals and All States",
title = "Relationship Between Testing Rate and\nEstimated Infections")# thinking about student populations
# https://nces.ed.gov/ipeds/TrendGenerator/app/trend-table/2/2?trending=column&rid=6
# Source: U.S. Department of Education, National Center for Education Statistics, Integrated Postsecondary Education Data System (IPEDS), 12-month Enrollment component final data (2001-02 - 2019-20) and provisional data (2020-21).
students <- read_csv(here('data/demographic/student_population.csv'), skip=2, n_max=4)## Rows: 4 Columns: 53
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Year
## num (52): Total, Alabama, Alaska, Arizona, Arkansas, California, Colorado, C...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
codes <- read_csv(here('data/demographic/statecodes.csv'))## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): state, abbrev, code
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
students <- students %>%
filter(Year =="2020-21") %>%
select(-Total) %>%
pivot_longer(-c(Year),
names_to="state",
values_to="student_population") %>%
left_join(codes, by= c('state'='state')) %>%
select(state=code, student_population, state_name = state)
corrected %>%
select(state,population,positive,total) %>%
distinct() %>%
left_join(students) %>%
ggplot(aes(x=student_population/population, y = total/population)) +
geom_point()## Joining with `by = join_by(state)`
corrected %>%
# filter(state!="CA") %>%
select(state,population,positive,total) %>%
distinct()%>%
left_join(students) %>%
ggplot(aes(x=fct_reorder(state_name, total/population),
y=total/population,
fill =student_population/population)) +
geom_boxplot() +
scale_fill_binned(type="viridis",
n.breaks=8,
direction=-1,
option="magma") +
theme_c()+
theme(legend.text=element_text(size=8),
axis.text.x=element_text(size=8),
legend.title = element_text(size=19),
axis.title.x= element_text(size=15)) +
coord_flip() +
labs(x="",
fill = "Ratio of Student Population\nto Census Population",
y= "Total Number of Tests Over Population Size",
subtitle =
"Color by Ratio of Student Population\nto Population Reported in the Census",
title = "Testing Rate by State Across All 2-Week Intervals") +
scale_y_continuous(n.breaks =6)## Joining with `by = join_by(state)`
ggsave(here::here(paste0('thesis/figure/college_populations.jpeg')),
height=11, width = 11, dpi=400)
corrected %>%
mutate(testrate=total/population) %>%
filter(testrate>.25) %>%
# filter( abs(testrate - max(testrate)) <=.05) %>%
select(date,testrate, state) %>%
distinct() Variant Data
#
# test %>%
# flatten()
#
# names <- -jsonlite::fromJSON(readLines("data/lineage_data_full.json"),
# flatten=TRUE)
#
# test <- jsonlite::fromJSON("https://github.com/cov-lineages/lineages-website/blob/master/_data/lineage_data.full.json?raw=true",
# flatten = TRUE,
# simplifyVector = TRUE)
#
# test <- jsonlite::read_json("https://github.com/cov-lineages/lineages-website/blob/master/_data/lineage_data.full.json?raw=true",
# simplifyVector=TRUE)
#
#
# test %>%
# flatten() %>%
# select(Lineage,)
# unnest() %>%
# glimpse()
#
#
# test %>%
# as.data.frame()
variant %>%
mutate(week_end_date = substr(week_ending, 1,10),
week_end_date=ymd(week_end_date)) %>%
filter(week_end_date==max(week_end_date)) %>%
left_join(names,by=c('variant'='lineage')) # https://dev.socrata.com/foundry/data.cdc.gov/jr58-6ysp
variant <- "https://data.cdc.gov/resource/jr58-6ysp.json?$limit=50000&$where=week_ending between '2021-02-18T00:00:00.000' and '2022-03-01T00:00:00.000'&usa_or_hhsregion=USA"
variant <- httr::GET(URLencode(variant))
# only go to the 6th because these are already weekly counts
# get the last couple weeks of 2021
variant <-jsonlite::fromJSON(
httr::content(variant,
as = "text",
encoding = "UTF-8"),
simplifyVector = TRUE,
flatten = TRUE) %>%
as_tibble()
saveRDS(variant, 'data/variant_prop.RDS')variant <- readRDS(here('data/variant_prop.RDS'))
# https://www.cdc.gov/coronavirus/2019-ncov/variants/variant-classifications.html
variant <- variant %>%
mutate(week_end_date = substr(week_ending, 1,10),
week_end_date=ymd(week_end_date)) %>%
mutate(variant_category = case_when(
grepl("B[.]1[.]1[.]529|BA[.]1|BA[.]2|BA[.]4|BA[.]5", variant) ~ "Omicron",
variant %in% c("B.1.621", "B.1.621.1") ~ "Mu",
variant == "B.1.1.7" ~ "Alpha",
grepl("B[.]1[.]351", variant) ~ "Beta",
grepl("P.1", variant) ~"Gamma",
grepl("B[.]1[.]617[.]2", variant) ~ "Delta",
variant %in% c("B.1.427" ,"B.1.429") ~ "Epsilon",
variant == "B.1.525" ~ "Eta",
variant == "Other" ~ "Other"
)) %>%
mutate(share = as.numeric(share),
creation_date = ymd(substr(creation_date,1,10))) %>%
filter(modeltype == "weighted" & time_interval=="weekly") %>%
group_by(variant, week_end_date, variant_category, time_interval) %>%
slice_max(n=1, order_by=creation_date) %>%
group_by(variant_category, week_end_date, time_interval) %>%
summarize(share =sum(share, na.rm=TRUE))## `summarise()` has grouped output by 'variant_category', 'week_end_date'. You
## can override using the `.groups` argument.
variant <- variant %>%
filter(variant_category != "Other") %>%
select(week_end_date, variant_category, share) %>%
# week beginning rather than week end
mutate(week = week_end_date - days(7)) %>%
filter(!is.na(variant_category)) varpal <- c("#7BD8DA","#709f0f", "#e71761", "#9245c8",
"#97481c", "#5054b1", "#DA7BA8", "#26B392")
variant %>% ggplot(aes(x=week, y = share,
color=variant_category, group = variant_category)) +
geom_line(linewidth=1.1) +
geom_point(alpha=.5, size=.8) +
scale_color_manual(values=varpal) +
# scale_color_brewer(palette="Set3") +
theme_c() +
scale_x_date(date_breaks="2 months", date_labels = "%b %Y") +
labs(y = "Variant Proportion", x = "Date", color = "Variant") +
guides(color =guide_legend(override.aes = list(linewidth=2))) +
theme(legend.title = element_text(size = 18, face="bold"),
axis.text.y = element_text(size = 15),
axis.text.x=element_text(size=13),
axis.title=element_text(size=16)) +
labs(title = "Variant Proportions Over Time in the U.S.",
subtitle = "for Variants that the WHO Designated\nVariant of Interest or Variant of Concern")ggsave(here('thesis/figure/variant_prop.jpeg'), width =9, height=4, dpi=400)Ratio of Unobserved to Observed
Considering version using \(P(S_1|\text{untested})\) centered at empirical value.
subset %>%
ggplot(aes(x=date, ymin = exp_cases_lb/positive,
ymax = exp_cases_ub/positive)) +
geom_ribbon() +
theme_c()+
facet_wrap(~state)# max average ratio across all states
subset %>%
mutate(ratio = exp_cases_median/positive) %>%
group_by(biweek) %>%
summarize(m = mean(ratio)) %>%
filter(m==max(m)) %>%
left_join(dates)## Joining with `by = join_by(biweek)`
subset %>%
mutate(ratio =exp_cases_median/positive) %>%
group_by(biweek) %>%
mutate(m=median(ratio)) %>%
select(date, m) %>%
distinct() %>%
ggplot(aes(x=date, y =m)) +
geom_point() +
theme_c()## Adding missing grouping variables: `biweek`
subset %>% filter(state=="MI") %>%
mutate(ratio =positive/total) %>%
group_by(biweek) %>%
mutate(m=median(ratio)) %>%
select(date, m) %>%
distinct() %>%
ggplot(aes(x=date, y =m)) +
geom_point() +
theme_c()## Adding missing grouping variables: `biweek`
subset %>% filter(state=="MI") %>%
mutate(ratio =exp_cases_median) %>%
group_by(biweek) %>%
mutate(m=median(ratio)) %>%
select(date, m) %>%
distinct() %>%
ggplot(aes(x=date, y =m)) +
geom_point() +
theme_c()## Adding missing grouping variables: `biweek`
subset %>% filter(state=="MI") %>%
mutate(ratio =positive/total) %>%
group_by(biweek) %>%
mutate(m=median(ratio)) %>%
select(date, m) %>%
distinct() %>%
ggplot(aes(x=date, y =m)) +
geom_point() +
theme_c()## Adding missing grouping variables: `biweek`
subset %>% filter(state=="MI") %>%
mutate(ratio =exp_cases_median) %>%
group_by(biweek) %>%
mutate(m=median(ratio)) %>%
select(date, m) %>%
distinct() %>%
ggplot(aes(x=date, y =m)) +
geom_point() +
theme_c()## Adding missing grouping variables: `biweek`
heatmap_dat <- subset %>%
group_by(biweek, state, exp_cases_median, positive) %>%
summarize(date = min(date)) %>%
ungroup() %>%
rename(code=state) %>%
left_join(codes) %>%
select(-c(code,abbrev))## `summarise()` has grouped output by 'biweek', 'state', 'exp_cases_median'. You
## can override using the `.groups` argument.
## Joining with `by = join_by(code)`
# group similar states together
wide <- heatmap_dat %>%
mutate(ratio= exp_cases_median/positive) %>%
select(-c(exp_cases_median, positive)) %>%
pivot_wider(names_from=state,values_from =ratio)
dist_mat <- dist(t(wide[3:ncol(wide)]), method = 'euclidean')
hclust_avg <- hclust(dist_mat, method = 'average')
plot(hclust_avg)cluster_order <- tibble(order = hclust_avg$order,
state = hclust_avg$labels[hclust_avg$order])
heatmap_dat %>%
mutate(ratio=exp_cases_median/positive) %>%
mutate(state=factor(state, levels= hclust_avg$labels[hclust_avg$order])) %>%
mutate(date_lab = format(as.POSIXct(date),format="%b %Y")) %>%
group_by(state) %>%
mutate(m = median(ratio)) %>%
ungroup() %>%
ggplot(aes(x=fct_reorder(date_lab, date),
y = fct_reorder(state,m),
fill =exp_cases_median/positive)) +
geom_tile() +
# scale_x_discrete(breaks=breaks_date, labels = breaks_date) +
viridis::scale_fill_viridis(option="rocket", direction=-1) +
labs(x="Date",
y= "State",
fill = "Ratio of Estimated Cases\nto Observed",
title = "Ratio of Median Estimated Infections to Observed Infections") +
theme_c() +
theme(axis.text.x = element_text(size=11, angle=30),
axis.text.y = element_text(size = 13),
axis.title = element_text(size=16),
legend.title = element_text(face="bold", size=18))heatmap_dat %>%
mutate(ratio=exp_cases_median/positive) %>%
group_by(biweek) %>%
summarize(m=median(ratio),
date=min(date)) %>%
arrange(desc(m))ggsave(here('thesis/figure/heatmap_ratio_est_observed.jpeg'),
height=10,
width =13,
dpi=400)Intervals by State and Biweek
pal <- c("red", "#10BAC5", "#1B10C5", "#EFB719", "#900C3F")
names(pal) <- c("Covidestim",
'$P(S_1|untested)$ Centered at Emp. Value',
'$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values',
"$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value"
)Only Michigan
# pb max
m1 <- corrected %>%
filter(state == "MI") %>%
pull(exp_cases_ub) %>%
max()
# covidestim max
m2 <- corrected %>%
filter(state == "MI") %>%
pull(infections.hi) %>%
max()
m <- max(m1,m2)
varpal <- c("#7BD8DA","#709f0f", "#e71761", "#9245c8",
"#97481c", "#5054b1", "#DA7BA8", "#26B392")
corrected %>%
filter(state == "MI" & version %in% c("v1","v4")) %>%
# filter(state %in% sample(corrected$state,5)) %>%
filter(biweek >=6) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=FALSE) +
geom_ribbon(aes(x = date,
fill = 'Covidestim',
ymin = infections.lo,
ymax = infections.hi),
alpha = .5) +
facet_grid(~vlabel, scales = "free_y",
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 40, size = 16),
axis.title.y = element_text(size=13),
plot.title=element_text(face = "bold",
hjust = .5,
size = 24,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 22,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 22),
plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 17),
legend.title = element_text(size=18, face="bold")) +
geom_line(aes(x=week, y=share*m,
color =variant_category,
group=variant_category),
data=variant,
linewidth=1.3) +
scale_fill_manual(values =pal,
labels = TeX(names(pal)),
breaks='Covidestim',
name='') +
scale_y_continuous(sec.axis = sec_axis( trans=~./m, name="Variant Proportion"),
labels = comma) +
labs(y = "Estimated Infections",
title = paste0("Estimated Infections by Version in Michigan"),
color = "SARS-CoV-2 Variant",
x="Date") +
guides(color = guide_legend(override.aes = list(linewidth =3),
ncol=2,title.position="top")) +
# scale_color_brewer(palette="Accent")
scale_color_manual(values=varpal)ggsave(here::here(paste0('thesis/figure/michigan_variant.pdf')),
height=10, width = 15, dpi=400)
corrected %>%
filter(state == "TX" & version %in% c("v1","v4")) %>%
# filter(state %in% sample(corrected$state,5)) %>%
filter(biweek >=6 ) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=FALSE) +
geom_ribbon(aes(x = date,
fill = 'Covidestim',
ymin = infections.lo,
ymax = infections.hi),
alpha = .5) +
facet_grid(~vlabel, scales = "free_y",
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c() +
theme(axis.text.x = element_text(angle = 60, size = 16),
axis.title.y = element_text(size=13),
axis.title.x = element_text(size=13),
plot.title=element_text(face = "bold",
hjust = .5,
size = 24,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 22,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 22),
plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 17),
legend.title = element_text(size=18, face="bold")) +
# geom_line(aes(x=week, y=share*m,
# color =variant_category,
# group=variant_category),
# data=variant,
# linewidth=1.3) +
scale_fill_manual(values =pal,
labels = TeX(names(pal)),
breaks='Covidestim',
name='') +
scale_y_continuous(
labels = comma) +
labs(y = "Estimated Infections",
title = paste0("Estimated Infections by Version in Texas"),
color = "SARS-CoV-2 Variant",
x="Date") +
guides(color = guide_legend(override.aes = list(linewidth =3),
ncol=2,title.position="top")) ggsave(here::here(paste0('thesis/figure/texas-lag.jpeg')),
height=8, width = 15, dpi=400)
corrected %>%
filter(state=="MI") %>%
filter(biweek >=6) %>%
select(date, total, positive,biweek) %>%
group_by(biweek) %>%
summarize(date=min(date),
positive=unique(positive),
total=unique(total)) %>%
mutate(pct_change_total =( total-lag(total,n=1))/lag(total,n=1),
pct_change_positive =( positive-lag(positive,n=1))/lag(positive,n=1)) %>%
pivot_longer(c(pct_change_total,pct_change_positive)) %>%
mutate(name = gsub("pct_change_", "", name),
name = paste(name, "tests")) %>%
ggplot(aes(x=date,y=value, color=name, group=name)) +
geom_hline(yintercept=0,alpha=.4) +
geom_line(linewidth =1.05) +
geom_point(alpha = .5) +
scale_y_continuous(labels=scales::percent) +
scale_color_manual(values = c("#268DB3", "#B38726"), name='') +
theme_c() +
theme(axis.title.x =element_text(size=14),
axis.title.y =element_text(size=14),
axis.text.x = element_text(size=11.5),
axis.text.y = element_text(size=13)) +
guides(color = guide_legend(override.aes = list(linewidth=2))) +
labs(y = "Percent Change in Tests",
x = "Date",
title = "Percent Change in Positive Tests and Total Tests\nin Michigan") +
scale_x_date(date_breaks="1.5 months", date_labels = "%b %Y") ## Warning: Removed 2 rows containing missing values (`geom_line()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
ggsave(here::here(paste0('thesis/figure/test_capacity.pdf')),
height=6, width = 13, dpi=400)## Warning: Removed 2 rows containing missing values (`geom_line()`).
## Removed 2 rows containing missing values (`geom_point()`).
corrected %>%
filter(state=="TX" | state=="MI") %>%
filter(biweek >=6) %>%
select(date, total, positive,biweek,state) %>%
group_by(biweek,state) %>%
summarize(date=min(date),
positive=unique(positive),
total=unique(total)) %>%
group_by(state) %>%
arrange(biweek) %>%
mutate(pct_change_total =( total-lag(total,n=1))/lag(total,n=1),
pct_change_positive =( positive-lag(positive,n=1))/lag(positive,n=1)) %>%
pivot_longer(c(pct_change_total,pct_change_positive)) %>%
mutate(name = gsub("pct_change_", "", name),
name = paste(name, "tests")) %>%
ggplot(aes(x=date,y=value, color=name, group=name)) +
geom_hline(yintercept=0,alpha=.4) +
geom_line(linewidth =1.05) +
geom_point(alpha = .5) +
scale_y_continuous(labels=scales::percent) +
scale_color_manual(values = c("#268DB3", "#B38726"), name='') +
theme_c() +
theme(axis.title.x =element_text(size=14),
axis.title.y =element_text(size=14),
axis.text.x = element_text(size=11.5),
axis.text.y = element_text(size=13)) +
guides(color = guide_legend(override.aes = list(linewidth=2))) +
labs(y = "Percent Change in Tests",
x = "Date",
title = "Percent Change in Positive Tests and Total Tests\nin Michigan") +
scale_x_date(date_breaks="1.5 months", date_labels = "%b %Y") +
facet_wrap(~state)## `summarise()` has grouped output by 'biweek'. You can override using the
## `.groups` argument.
## Warning: Removed 2 rows containing missing values (`geom_line()`).
## Warning: Removed 4 rows containing missing values (`geom_point()`).
corrected %>%
filter(state=="TX") %>%
filter(version =="v4") %>%
group_by(biweek,state) %>%
summarize(date=min(date),
positive=unique(positive),
total=unique(total),
infections=unique(infections),
exp_cases_median=unique(exp_cases_median)) %>%
ggplot(aes(x=date)) +
geom_line(aes(y=exp_cases_median)) +
geom_line(aes(y=infections),color="red")## `summarise()` has grouped output by 'biweek'. You can override using the
## `.groups` argument.
corrected %>%
filter(state=="TX") %>%
filter(version =="v4") %>%
group_by(biweek,state) %>%
summarize(date=min(date),
positive=unique(positive),
total=unique(total),
infections=unique(infections),
exp_cases_median=unique(exp_cases_median)) %>%
ggplot(aes(x=date)) +
geom_line(aes(y=positive)) +
geom_line(aes(y=(positive/total)*807598),color="red")## `summarise()` has grouped output by 'biweek'. You can override using the
## `.groups` argument.
corrected %>%
filter(state=="MI") %>%
select(date, total, positive,biweek) %>%
group_by(biweek) %>%
summarize(date=min(date),
positive=unique(positive),
total=unique(total)) %>%
ggplot(aes(x=date,y=positive/total)) +
geom_line() +
geom_point() corrected %>%
select(date, total, positive,biweek,state) %>%
distinct() %>%
filter(biweek>=6) %>%
group_by(biweek,state) %>%
summarize(date=min(date),
positive=unique(positive),
total=unique(total)) %>%
group_by(state) %>%
mutate(pct_change_total =( total-lag(total,n=1, order_by=date))/lag(total,n=1, order_by=date),
pct_change_positive =( positive-lag(positive,n=1, order_by=date))/lag(positive,n=1, order_by=date)) %>%
pivot_longer(c(pct_change_total,pct_change_positive)) %>%
mutate(name = gsub("pct_change_", "", name),
name = paste(name, "tests")) %>%
ggplot(aes(x=date,y=value, color=name, group=name)) +
geom_line(linewidth =1.05) +
geom_point(alpha = .5) +
scale_y_continuous(labels=scales::percent) +
scale_color_manual(values = c("#268DB3", "#B38726"), name='') +
theme_c() +
theme(axis.text.x=element_text(size=12, angle=30),
legend.position="top") +
facet_wrap(~state,scales="free_y", ncol=7)+
guides(color = guide_legend(override.aes = list(linewidth=3))) +
labs(y = "Percent Change in Tests",
x = "Date",
title = "Percent Change in Positive Tests and Total Tests\nin All States")## `summarise()` has grouped output by 'biweek'. You can override using the
## `.groups` argument.
## Warning: Removed 2 rows containing missing values (`geom_line()`).
## Warning: Removed 102 rows containing missing values (`geom_point()`).
ggsave(here::here(paste0('thesis/figure/test_capacity_all_states.pdf')),
height=13, width = 13, dpi=400)## Warning: Removed 2 rows containing missing values (`geom_line()`).
## Removed 102 rows containing missing values (`geom_point()`).
Comparing Data Sources
Texas
source <- read_csv('https://media.githubusercontent.com/media/covidestim/covidestim-sources/master/data-sources/jhu-states.csv')
saveRDS(source, here('data/covidestim/state_source.RDS'))source <- readRDS(here('data/covidestim/state_source.RDS'))
covidestim <- source %>%
filter(date >"2021-01-01" & date <= "2022-03-03")%>%
mutate(week=week(date),
year=year(date)) %>%
# add tiny week to previous week
mutate(week = ifelse(week==53 & year == 2021,52,week)) %>%
group_by(year,week, state) %>%
summarize(date =min(date),cases=sum(cases))## `summarise()` has grouped output by 'year', 'week'. You can override using the
## `.groups` argument.
tests_weekly <- readRDS(here("data/state_level/tests_daily_all_states.RDS")) %>%
mutate(week=week(date),
year=year(date)) %>%
mutate(week = ifelse(week==53 & year == 2021,52,week)) %>%
group_by(year,week, state) %>%
summarize(date =min(date), total = sum(total), total=sum(total),positive=sum(positive)) %>%
mutate(source="CDC Positive Tests") %>%
rename(cases=positive)## `summarise()` has grouped output by 'year', 'week'. You can override using the
## `.groups` argument.
tests_weekly_texas <- tests_weekly %>%
filter(state=="TX")
covidestim <- covidestim %>%
mutate(source="JHU Cases")
covidestim %>%
filter(state=="Texas") %>%
bind_rows(tests_weekly) %>%
ggplot(aes(x=date,y=cases, color=source))+
geom_line() dates <- readRDS(here('data/date_to_biweek.RDS'))
covidestim %>%
filter(state=="Texas") %>%
bind_rows(tests_weekly_texas) %>%
left_join(dates) %>%
group_by(biweek, state,source) %>%
summarize(date=min(date),
total=sum(total),
cases=sum(cases)) %>%
mutate(testpos = ifelse(source == "CDC Positive Tests", cases/total, NA)) %>%
filter(biweek >=2) %>%
ungroup()%>%
ggplot(aes(x=date))+
geom_line(aes(y=cases, color=source)) +
geom_line(aes(y=testpos*10e5, group=source, color='Test Positivity'),
linetype=2) +
#scale_linetype_discrete(breaks = c(1,1,2)) +
scale_color_manual(values=c("JHU Cases" = "#224EAF",
"CDC Positive Tests"= "#279143",
"Test Positivity"= '#E38E4F')) +
guides(color = guide_legend(override.aes = list(linetype = c(1,1,2)))) +
theme_c() +
theme(axis.title = element_text(size=15),
axis.text.x=element_text(size=12)) +
scale_y_continuous(labels=comma, sec.axis = sec_axis(~./10e5 , name = 'Test Positivity')) +
labs(color ='',
title = 'Comparing Data Sources for Texas',
y="Total Over 2-week Interval",
x= "Date")## Joining with `by = join_by(date)`
## `summarise()` has grouped output by 'biweek', 'state'. You can override using
## the `.groups` argument.
## Warning: Removed 29 rows containing missing values (`geom_line()`).
#
# ggsave(here::here(paste0('thesis/figure/texas_lag.pdf')),
# height=10, width = 15, dpi=400)
ggsave(here('thesis/figure/texas-data-sources.jpeg'),height=5, width=10, dpi=400)## Warning: Removed 29 rows containing missing values (`geom_line()`).
All States
library(cowplot)##
## Attaching package: 'cowplot'
## The following object is masked from 'package:lubridate':
##
## stamp
###########################################
# across all of U.S., faceted by state
###########################################
pltlist <- covidestim %>%
left_join(codes ) %>%
rename(state_name =state, state=code) %>%
bind_rows(tests_weekly) %>%
left_join(dates) %>%
group_by(biweek, state,source) %>%
summarize(date=min(date),
total=sum(total),
cases=sum(cases)) %>%
filter(!is.na(state)) %>%
mutate(testpos = ifelse(source == "CDC Positive Tests", cases/total, NA)) %>%
filter(biweek >=2) %>%
ungroup()%>%
group_by(state) %>%
group_split() %>%
map( ~{
adj <- .x %>% pull(cases) %>% max()
adj <- 1.8*adj
state <- .x %>%pull(state) %>% unique()
.x %>% ggplot(aes(x=date))+
geom_line(aes(y=cases, color=source)) +
geom_line(aes(y=testpos*adj, group=source, color='Test Positivity'),
linetype=2) +
#scale_linetype_discrete(breaks = c(1,1,2)) +
scale_color_manual(values=c("JHU Cases" = "#224EAF",
"CDC Positive Tests"= "#279143",
"Test Positivity"= '#E38E4F')) +
guides(color = guide_legend(override.aes = list(linetype = c(1,1,2)))) +
theme_c() +
theme(axis.title = element_text(size=15),
axis.text.x=element_text(size=12),
legend.position="none") +
scale_y_continuous(labels=comma, sec.axis = sec_axis(~./adj , name = 'Test Positivity')) +
labs(color ='',
title = state,
y="Value",
x= "Date") }
) ## Joining with `by = join_by(state)`
## Joining with `by = join_by(date)`
## `summarise()` has grouped output by 'biweek', 'state'. You can override using
## the `.groups` argument.
n <- length(pltlist)
names(pltlist) <- 1:length(pltlist)
pltlist_1 <-map(1:(n/2), ~ pltlist[[.x]])
pltlist_2 <-map((n/2 +1):n, ~ pltlist[[.x]])
title_gg <- ggplot() +
labs(title = "Comparing CDC Positive Tests to JHU Cases") +
theme(plot.title=element_text(face="bold", hjust = .5, size = 30, margin =margin(5,0,2,0)))
legend <- get_legend(
pltlist[[1]] +
guides(color = guide_legend(
nrow = 1,
override.aes = list(
linewidth=4))) +
theme(legend.position = "top",
legend.text = element_text(size = 20))
)## Warning: Removed 29 rows containing missing values (`geom_line()`).
# png(here::here("thesis/figure/melded.png"), width =500,height=500)
title_row <- cowplot::plot_grid(title_gg, legend,nrow=2)
plot1 <- plot_grid( plotlist=pltlist_1, ncol=4)## Warning: Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
plot2 <- plot_grid( plotlist=pltlist_2, ncol=4)## Warning: Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
## Removed 29 rows containing missing values (`geom_line()`).
plot_grid(title_row, plot1, rel_heights=c(.05,.95), nrow=2)ggsave(here('thesis/figure/jhu_cdc_all_states_1.jpeg'), height=25, width=19)
plot_grid(title_row, plot2, rel_heights=c(.05,.95), nrow=2)ggsave(here('thesis/figure/jhu_cdc_all_states_2.jpeg'), height=25, width=19)All States – Break up into 2 groups
states_all_versions <- corrected %>%
select(state,version) %>%
group_by(state) %>%
summarize(n_versions = n_distinct(version)) %>%
filter(n_versions ==4)
cat(paste0("### Number of states with 4 versions is: ", states_all_versions$state %>% unique() %>% length()))## ### Number of states with 4 versions is: 27
cat(paste0("### Number of states with at least 1 versions is: ", corrected$state %>% unique() %>% length()))## ### Number of states with at least 1 versions is: 51
end <- length(unique(states_all_versions$state))
n <- floor(end/2)
# first group
corrected %>%
filter(state %in% states_all_versions$state[1:n]) %>%
# filter(state %in% sample(corrected$state,5)) %>%
filter(biweek >=6) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=FALSE) +
geom_ribbon(aes(x = date,
fill = 'Covidestim',
ymin = infections.lo,
ymax = infections.hi),
alpha = .5) +
# geom_linerange(aes(xmin = xmin,
# xmax=xmax,
# y = positive,
# color = 'obs')) +
facet_grid(state~vlabel, scales = "free_y",
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 60, size = 16),
plot.title=element_text(face = "bold",
hjust = .5,
size = 20,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 10),
plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
scale_fill_manual(values =pal,
labels = TeX(names(pal)),
name='',
breaks="Covidestim") +
labs(y = "Estimated Infections",
title = paste0("Estimated Infections by Version Across the United States")) +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2),
nrow=2,
byrow=TRUE))ggsave(here::here(paste0('thesis/figure/state_comp_covidestim1.pdf')),
height=24, width = 17, dpi=400)
corrected %>%
filter(state %in% states_all_versions$state[n+1:end]) %>%
# filter(state %in% sample(corrected$state,5)) %>%
filter(biweek >=6) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .6,
show.legend=FALSE) +
geom_ribbon(aes(x = date,
fill = 'Covidestim',
ymin = infections.lo,
ymax = infections.hi),
alpha = .8) +
# geom_linerange(aes(xmin = xmin,
# xmax=xmax,
# y = positive,
# color = 'obs')) +
facet_grid(state~vlabel, scales = "free_y",
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 60, size = 16),
plot.title=element_text(face = "bold",
hjust = .5,
size = 20,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 10),
plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
scale_fill_manual(values =pal,
labels = TeX(names(pal)),
name='',
breaks="Covidestim") +
labs(y = "Estimated Infections",
title = paste0("Estimated Infections by Version Across the United States")) +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2),
nrow=2,
byrow=TRUE),
fill="none")ggsave(here::here('thesis/figure/state_comp_covidestim2.pdf'),
height=24, width = 17, dpi=400)#######################
# for presentation
#######################
corrected %>%
filter(state %in% c("MA", "MI")) %>%
# filter(state %in% sample(corrected$state,5)) %>%
filter(biweek >=6) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=FALSE) +
geom_ribbon(aes(x = date,
fill = 'Covidestim',
ymin = infections.lo,
ymax = infections.hi),
alpha = .5) +
# geom_linerange(aes(xmin = xmin,
# xmax=xmax,
# y = positive,
# color = 'obs')) +
facet_grid(state~vlabel, scales = "free_y",
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 40, size = 10),
axis.text.y = element_text( size = 7),
plot.title=element_text(face = "bold",
hjust = .5,
size = 20,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 10),
plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
scale_fill_manual(values =pal,
breaks='Covidestim',
labels = TeX(names(pal)),
name='') +
labs(y = "Estimated Infections",
title = paste0("Estimated Infections by Version (State Level)")) ggsave(here('presentation/figure/state_level_mi_ma.jpeg'), width =13, height=9)ratios <- corrected %>%
filter(vlabel == "$P(S_1|untested)$ Centered at Emp. Value") %>%
# filter(state %in% sample(unique(corrected$state),5)) %>%
mutate(ratio_undetected_lb = exp_cases_lb/ positive,
ratio_undected = exp_cases_median/positive,
ratio_undetected_ub = exp_cases_ub/ positive) %>%
group_by(state) %>%
mutate(m_ratio = median(ratio_undected)) %>%
ungroup() %>%
mutate(state=fct_reorder(state,m_ratio)) %>%
arrange(state)
states_ordered <-ratios$state %>% unique()
ratios %>%
# filter(state %in% states_ordered[1:n]) %>%
ggplot(aes(x=date,ymin=ratio_undetected_lb, ymax=ratio_undetected_ub)) +
geom_ribbon(
alpha = .8) +
# geom_linerange(aes(xmin = xmin,
# xmax=xmax,
# y = positive,
# color = 'obs')) +
facet_wrap(~state,
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed)),
ncol = 5) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 60, size = 16),
plot.title=element_text(face = "bold",
hjust = .5,
size = 20,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 10),
plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
scale_fill_manual(values =pal,
labels = TeX(names(pal)),
name='') +
labs(y = "Ratio of Estimated Infections to Observed Cases",
title = paste0("Ratio of Estimated to Observed Cases")) +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2),
nrow=2,
byrow=TRUE))